
******* Create the data-set 
 
* Convert to stata the data-set with oblast conversions
import excel using "$data94/birthplace_codes_94.xlsx", clear firstrow sheet("Sheet1") cellrange(A1:G99)
sort regbir
save "$datatemp/birthplace_codes_94.dta", replace

* Keep only co-variates to merge to the 1994 microcensus
use "$datatemp/rosstat_allvars.dta", clear
rename Year year 
keep Birthplace_code year trade concrete brick meat timber canned
sort Birthplace_code year
save "$datatemp/covars.dta", replace

* Merge all the data files of the 1994 microcensus
use "$data94/Freq_0_5.dta", clear
append using "$data94/Freq_1_0.dta"
append using "$data94/Freq_2_0a.dta"
append using "$data94/Freq_2_0b.dta"
save "$datatemp/census94combined.dta", replace

use "$datatemp/census94combined.dta", clear
* Drop republics
foreach j of numlist 1100 1300 1500 1700 1900 2200 2400 2600 2800 3100 3300 3500 3700 3900 4200 8888 9999{
forvalues i=1/2 {
drop if regbir2==`j'`i'
}
}
gen urban = substr(string(regbir2),-1,1)
destring urban, replace
gen regbir = substr(string(regbir2),1, length(string(regbir2))-1)
destring regbir, replace
sort regbir 
merge (regbir) using "$datatemp/birthplace_codes_94.dta"
drop if _merge==1
drop if _merge==2
drop _merge
assert regbir!=.
save "$datatemp/census94combined1.dta", replace 

******* Perform the analysis
use "$datatemp/census94combined1.dta", clear
* Keep women
keep if se2==2

* Create interval to birth variable (in months), and then convert it to years
* For this use the year and month of birth of every child of a woman
forvalues j=2/7 {
local i=`j'-1
gen intervalmn`j'=(ch`j'biy-ch`i'biy)*12+(ch`j'bim-ch`i'bim) if ch`j'bim>=ch`i'bim
replace intervalmn`j'=(ch`j'biy-ch`i'biy-1)*12+(12-ch`i'bim)+ch`j'bim if ch`j'bim<ch`i'bim
replace intervalmn`j'=. if intervalmn`j'==0 & (ch`i'bim==ch`j'bim)
replace intervalmn`j'=. if ch`j'biy==0 | ch`i'biy==0
drop if ch`j'biy<ch`i'biy & ch`j'biy!=0
gen intervalyr`j'=intervalmn`j'/12
}

gen intervalmn1 = (ch1biy-mar1ye)*12+(ch1bim-mar1mo) if ch1bim>=mar1mo
replace intervalmn1 = (ch1biy-mar1ye-1)*12+(12-mar1mo)+ch1bim if ch1bim<mar1mo
replace intervalmn1=0 if mar1ye==0 & ch1biy!=0
replace intervalmn1 =. if ch1biy==0
replace intervalmn1=0 if mar1ye>ch1biy & ch1biy!=0
replace intervalmn1=0 if intervalmn1<0 & intervalmn1>=-1
gen intervalyr1=intervalmn1/12

* Create a unique identifier for every woman
gen myid = _n
forvalues j=1/7 {
rename ch`j'biy year`j'
rename ch`j'bim month`j'
}
* Make the unit a mother-child observation
reshape long year month married_bir intervalmn intervalyr, i(myid) j(numkid)
* Merge co-variates from publicly available data
sort Birthplace_code year
merge (Birthplace_code year) using "$datatemp/covars.dta"
drop if year==0
keep if year>=1970
keep if _merge==3

* Create age at the birth of each child
gen age_bir = year-darg2 if month>=darm2
replace age_bir = year-darg2-1 if month<darm2
replace age_bir=. if year==0

keep if year!=0
drop if age_bir>80
drop if darg2==0
drop if ed2==0 
drop if income>400000

keep if year>=1975 & year<=1986
 
gen treat = (loc==1)
gen pre = (year>=1975 & year<=1977)
gen post1 = (year==1981)
gen post2 = (year==1982)
gen post3 = (year>=1983 & year<=1986)
gen pre_treat = pre*treat
gen post1_treat=post1*treat
gen post2_treat=post2*treat
gen post3_treat=post3*treat
gen year_treat = year*treat
set more off
local r replace

*************** Regressions for table 4
* Dependent variable: age at birth
regress age_bir pre_treat post1_treat post2_treat post3_treat pre post1 post2 post3 _IBirthplac* _Iyear* trade concrete brick meat timber canned if numkid>=2 & numkid<=3, vce(cluster Birthplace_code)
sum age_bir if e(sample) & loc==1 & year==1980 & numkid>=2 & numkid<=3
outreg2 using "$output/table4.xls", `r' addstat (Dep var mean, r(mean), Num oblasts, e(N_clust)) addtext(Name, "`var'") bracket noaster ctitle(`e(cmdline)')
* Dependent variable: interval since last birth
regress intervalyr pre_treat post1_treat post2_treat post3_treat pre post1 post2 post3 _IBirthplac* _Iyear* trade concrete brick meat timber canned if numkid>=2 & numkid<=3, vce(cluster Birthplace_code)
sum intervalyr if e(sample) & loc==1 & year==1980 & numkid>=2 & numkid<=3
outreg2 using "$output/table4.xls", `r' addstat (Dep var mean, r(mean), Num oblasts, e(N_clust)) addtext(Name, "`var'") bracket noaster ctitle(`e(cmdline)')



 